import pandas as pd
# Veri setini yükleme (Dosyanın yolunu değiştirin ve ayraç belirtin)
veri_seti = pd.read_csv("common_dataset_touch_features_offset.csv", sep=';')
# Label encoding işlemi için LabelEncoder kullanma
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
veri_seti['touch'] = label_encoder.fit_transform(veri_seti['touch'])
veri_seti['finger'] = label_encoder.fit_transform(veri_seti['finger'])
veri_seti['palm'] = label_encoder.fit_transform(veri_seti['palm'])
veri_seti['fist'] = label_encoder.fit_transform(veri_seti['fist'])
veri_seti.to_csv('touchstandard.csv', index=False)
# Veriyi df olarak yazdırma
print(veri_seti)
user_id touch_type touch finger palm fist 1 2 3 4 ... 985 \
0 0 0 1 1 1 1 4 0 12 -16 ... -8
1 0 0 1 1 1 1 -52 -2 17 1 ... 1
2 0 0 1 1 1 1 2 7 14 -5 ... -8
3 0 0 1 1 1 1 0 6 10 8 ... 49
4 0 0 1 1 1 1 2 5 3 0 ... 3
... ... ... ... ... ... ... .. .. .. .. ... ...
2051 29 9 0 1 0 1 0 4 -10 3 ... 22
2052 29 9 0 1 0 1 8 -3 -13 -3 ... 16
2053 29 9 0 1 0 1 8 -9 -12 -16 ... 74
2054 29 9 0 1 0 1 56 0 0 -20 ... 75
2055 29 0 1 1 1 1 11 -1 -12 2 ... 52
986 987 988 989 990 991 992 993 994
0 57 -20 -52 20 55 15 -4 -18 -3
1 59 -17 -45 44 17 -4 3 1 -40
2 56 23 -51 21 33 40 53 -51 -4
3 4 2 -41 1 3 -8 3 2 -40
4 4 4 3 -10 32 10 54 -50 -50
... ... ... ... ... ... ... ... ... ...
2051 -43 56 10 9 9 17 8 70 61
2052 21 4 46 -7 -25 -6 -38 18 57
2053 -35 38 69 -4 -38 8 -40 65 64
2054 -37 38 68 -2 -37 6 -39 67 64
2055 -54 46 -2 2 -11 6 0 61 51
[2056 rows x 1000 columns]
import pandas as pd
from sklearn.preprocessing import StandardScaler
# Veri setini oku
veri_seti = pd.read_csv('touchstandard.csv')
# Verilerin standardizasyonunu yap
scaler = StandardScaler()
standardize_veri = scaler.fit_transform(veri_seti)
# Standardize edilmiş veriyi yeni bir DataFrame'e dönüştür
standardize_veri_df = pd.DataFrame(standardize_veri, columns=veri_seti.columns)
# Yeni Excel dosyasına kaydet
standardize_veri_df.to_csv('standardize_edilmis_veri.csv', index=False)
import pandas as pd
# Veri setini yükleme
veri = pd.read_csv('standardize_edilmis_veri.csv')
# Tüm verilerin istatistiksel bilgilerini almak
istatistikler = veri.describe()
# İstatistiksel bilgileri ekrana yazdırma
print(istatistikler)
user_id touch_type touch finger palm \
count 2056.000000 2.056000e+03 2.056000e+03 2.056000e+03 2.056000e+03
mean 0.000000 -1.382379e-17 -7.948678e-17 -8.207875e-17 -1.261421e-16
std 1.000243 1.000243e+00 1.000243e+00 1.000243e+00 1.000243e+00
min -1.611940 -7.219725e-01 -1.119994e+00 -2.628249e+00 -2.254516e+00
25% -0.814201 -7.219725e-01 -1.119994e+00 3.804815e-01 4.435543e-01
50% -0.016463 -7.219725e-01 8.928621e-01 3.804815e-01 4.435543e-01
75% 0.895239 5.855593e-01 8.928621e-01 3.804815e-01 4.435543e-01
max 1.692978 2.219974e+00 8.928621e-01 3.804815e-01 4.435543e-01
fist 1 2 3 4 \
count 2.056000e+03 2.056000e+03 2.056000e+03 2.056000e+03 2.056000e+03
mean 4.838326e-17 -2.764758e-17 2.764758e-17 -1.382379e-17 -1.382379e-17
std 1.000243e+00 1.000243e+00 1.000243e+00 1.000243e+00 1.000243e+00
min -2.355371e+00 -1.811150e+00 -2.978362e+00 -2.421759e+00 -2.379373e+00
25% 4.245616e-01 -2.220333e-01 -7.372157e-01 -7.421149e-01 -6.695177e-01
50% 4.245616e-01 -8.960684e-02 -3.685738e-02 4.171878e-02 9.041792e-02
75% 4.245616e-01 1.234658e+00 8.035726e-01 6.016000e-01 7.553616e-01
max 4.245616e-01 1.684907e+00 3.044719e+00 2.505196e+00 2.370225e+00
... 985 986 987 988 \
count ... 2.056000e+03 2.056000e+03 2.056000e+03 2.056000e+03
mean ... -2.764758e-17 -6.911894e-18 5.529515e-17 3.455947e-18
std ... 1.000243e+00 1.000243e+00 1.000243e+00 1.000243e+00
min ... -2.059001e+00 -2.055446e+00 -2.210669e+00 -1.725556e+00
25% ... -4.191911e-01 -3.402519e-01 -7.817350e-01 -9.361703e-01
50% ... -1.185592e-01 -1.258527e-01 -8.891872e-02 2.175478e-01
75% ... 1.056638e+00 1.106943e+00 6.904996e-01 5.894701e-01
max ... 1.958534e+00 2.098540e+00 2.898852e+00 2.828594e+00
989 990 991 992 993 \
count 2.056000e+03 2.056000e+03 2.056000e+03 2.056000e+03 2.056000e+03
mean 2.159967e-17 -6.911894e-17 4.147137e-17 5.529515e-17 6.911894e-18
std 1.000243e+00 1.000243e+00 1.000243e+00 1.000243e+00 1.000243e+00
min -3.079631e+00 -2.054193e+00 -2.730016e+00 -2.293154e+00 -1.606932e+00
25% -5.118428e-01 -3.982982e-01 -5.404825e-01 -4.901066e-01 -1.073690e+00
50% -1.167984e-01 -1.223158e-01 -1.499447e-02 -2.456255e-01 7.698858e-02
75% 5.745292e-01 7.669609e-01 5.104935e-01 1.099020e+00 4.137727e-01
max 3.043556e+00 2.116208e+00 3.137934e+00 1.985264e+00 2.322216e+00
994
count 2.056000e+03
mean -2.764758e-17
std 1.000243e+00
min -1.618510e+00
25% -9.923127e-01
50% 2.223585e-01
75% 4.939620e-01
max 2.576255e+00
[8 rows x 1000 columns]
import pandas,numpy,seaborn
import matplotlib.pyplot as plot
dataset = pandas.read_csv('standardize_edilmis_veri.csv')
correlation_of_dataset = dataset.corr()
absolute_of_corr = numpy.abs(correlation_of_dataset)
normalized_corr = (absolute_of_corr - absolute_of_corr.min()) / (absolute_of_corr.max() - absolute_of_corr.min())
plot.figure(figsize=(50,30))
seaborn.heatmap(normalized_corr, annot=True, cmap='coolwarm', vmin=0, vmax=1, fmt='.2f', annot_kws={'size':10}, linewidth=.9)
plot.title('Normalized Absolute Cross-Correlation Map')
plot.show()
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Veri setini yükleme
veri = pd.read_csv('touchstandard.csv')
# Girdi değişkenlerinin isimlerini alın (son 6 sütun hariç)
girdi_degiskenleri = veri.columns[775:780]
# Çıktı değişkenleri için sütun aralığını belirleyin
cikti_degiskenleri = veri.columns[:-988]
# Her bir girdi değişkeni için her bir çıktı değişkeniyle olan ilişkiyi görselleştirme
for girdi_degiskeni in girdi_degiskenleri:
for cikti_degiskeni in cikti_degiskenleri:
plt.figure(figsize=(15, 5))
# Histogramlar
plt.subplot(1, 3, 1)
sns.histplot(x=girdi_degiskeni, data=veri, kde=True)
plt.title(f'{girdi_degiskeni} ve {cikti_degiskeni} Histogramı')
# Saçılım (Scatter) Grafikleri
plt.subplot(1, 3, 2)
sns.scatterplot(x=girdi_degiskeni, y=cikti_degiskeni, data=veri)
plt.title(f'{girdi_degiskeni} ve {cikti_degiskeni} Arasındaki İlişki')
# Keman Grafikleri
plt.subplot(1, 3, 3)
sns.violinplot(x=girdi_degiskeni, y=cikti_degiskeni, data=veri)
plt.title(f'{girdi_degiskeni} ve {cikti_degiskeni} Dağılımı')
plt.tight_layout()
plt.show()
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Veri setini yükleme
veri = pd.read_csv('standardize_edilmis_veri.csv')
# Girdi değişkenlerinin isimlerini alın (son 6 sütun hariç)
girdi_degiskenleri = veri.columns[775:780]
# Çıktı değişkenleri için sütun aralığını belirleyin
cikti_degiskenleri = veri.columns[:-988]
# Her bir girdi değişkeni için her bir çıktı değişkeniyle olan ilişkiyi görselleştirme
for girdi_degiskeni in girdi_degiskenleri:
for cikti_degiskeni in cikti_degiskenleri:
plt.figure(figsize=(7, 5))
# Keman Grafikleri
sns.violinplot(x=girdi_degiskeni, y=cikti_degiskeni, data=veri)
plt.title(f'{girdi_degiskeni} ve {cikti_degiskeni} Dağılımı')
plt.tight_layout()
plt.show()
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import joblib
# Veri setini yükle
data = pd.read_csv('touchstandard.csv')
# Bağımsız değişkenler ve hedef değişkeni ayır
X = df.drop(columns=["user_id", "touch_type", "touch", "finger", "palm", "fist"])
y = df["user_id"]
models = {
"Naive Bayes": GaussianNB(),
"Random Forest": RandomForestClassifier(),
"SVM": SVC()
}
def evaluate_model(model, X, y):
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=47)
scores = cross_val_score(model, X, y, scoring="accuracy", cv=cv)
return scores.mean()
results = {}
# Evaluate models, split the dataset, and print both mean accuracy and test accuracy
for name, model in models.items():
# Split the dataset for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47, stratify=y)
# Evaluate the model using cross-validation
mean_accuracy = evaluate_model(model, X_train, y_train)
# Train the model on the training set
model.fit(X_train, y_train)
# Calculate the test accuracy
test_accuracy = model.score(X_test, y_test)
results[name] = test_accuracy
# Print both mean accuracy and test accuracy
print(f"{name} cross-validation mean accuracy score:", mean_accuracy)
print(f"{name} test accuracy score:", test_accuracy)
# Select the best performing model based on mean accuracy
best_model = max(results, key=results.get)
print("Best performing model based on test accuracy:", best_model)
# Split the dataset for testing using the best model with specified random seed and stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47, stratify=y)
# Train the best model
best_model_instance = models[best_model]
best_model_instance.fit(X_train, y_train)
# Evaluate the performance of the best model on the test set
test_accuracy = best_model_instance.score(X_test, y_test)
print("Test set accuracy score (best model):", test_accuracy)
# Calculate evaluation metrics for the best model
y_pred = best_model_instance.predict(X_test)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
# Print the evaluation metrics for the best model
print("\nEvaluation metric scores for best model which is {}:".format(best_model))
print("Test set accuracy:", test_accuracy)
print("Test set precision:", precision)
print("Test set recall:", recall)
print("Test set F1 score:", f1)
joblib.dump(best_model, 'best_model_isedataset_classification.pkl')
Naive Bayes cross-validation mean accuracy score: 1.0 Naive Bayes test accuracy score: 1.0 Random Forest cross-validation mean accuracy score: 1.0 Random Forest test accuracy score: 1.0 SVM cross-validation mean accuracy score: 1.0 SVM test accuracy score: 1.0 Best performing model based on test accuracy: Naive Bayes Test set accuracy score (best model): 1.0 Evaluation metric scores for best model which is Naive Bayes: Test set accuracy: 1.0 Test set precision: 1.0 Test set recall: 1.0 Test set F1 score: 1.0
['best_model_isedataset_classification.pkl']
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import joblib
# Veri setini yükle
data = pd.read_csv('touchstandard.csv')
# Bağımsız değişkenler ve hedef değişkeni ayır
X = df.drop(columns=["user_id", "touch_type", "touch", "finger", "palm", "fist"])
y = df["touch_type"]
models = {
"Naive Bayes": GaussianNB(),
"Random Forest": RandomForestClassifier(),
"SVM": SVC()
}
def evaluate_model(model, X, y):
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=47)
scores = cross_val_score(model, X, y, scoring="accuracy", cv=cv)
return scores.mean()
results = {}
# Evaluate models, split the dataset, and print both mean accuracy and test accuracy
for name, model in models.items():
# Split the dataset for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47, stratify=y)
# Evaluate the model using cross-validation
mean_accuracy = evaluate_model(model, X_train, y_train)
# Train the model on the training set
model.fit(X_train, y_train)
# Calculate the test accuracy
test_accuracy = model.score(X_test, y_test)
results[name] = test_accuracy
# Print both mean accuracy and test accuracy
print(f"{name} cross-validation mean accuracy score:", mean_accuracy)
print(f"{name} test accuracy score:", test_accuracy)
# Select the best performing model based on mean accuracy
best_model = max(results, key=results.get)
print("Best performing model based on test accuracy:", best_model)
# Split the dataset for testing using the best model with specified random seed and stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47, stratify=y)
# Train the best model
best_model_instance = models[best_model]
best_model_instance.fit(X_train, y_train)
# Evaluate the performance of the best model on the test set
test_accuracy = best_model_instance.score(X_test, y_test)
print("Test set accuracy score (best model):", test_accuracy)
# Calculate evaluation metrics for the best model
y_pred = best_model_instance.predict(X_test)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
# Print the evaluation metrics for the best model
print("\nEvaluation metric scores for best model which is {}:".format(best_model))
print("Test set accuracy:", test_accuracy)
print("Test set precision:", precision)
print("Test set recall:", recall)
print("Test set F1 score:", f1)
joblib.dump(best_model, 'best_model_isedataset_classification.pkl')
Naive Bayes cross-validation mean accuracy score: 0.506160384331116 Naive Bayes test accuracy score: 0.48058252427184467 Random Forest cross-validation mean accuracy score: 0.6015779748706579 Random Forest test accuracy score: 0.6043689320388349 SVM cross-validation mean accuracy score: 0.5967147080561713 SVM test accuracy score: 0.5898058252427184 Best performing model based on test accuracy: Random Forest Test set accuracy score (best model): 0.6092233009708737 Evaluation metric scores for best model which is Random Forest: Test set accuracy: 0.6092233009708737 Test set precision: 0.48814618940372195 Test set recall: 0.6092233009708737 Test set F1 score: 0.52633754730872
/Users/ardilsilanaydin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_classification.py:1469: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result))
['best_model_isedataset_classification.pkl']
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import joblib
# Veri setini yükle
data = pd.read_csv('touchstandard.csv')
# Bağımsız değişkenler ve hedef değişkeni ayır
X = df.drop(columns=["user_id", "touch_type", "touch", "finger", "palm", "fist"])
y = df["touch"]
models = {
"Naive Bayes": GaussianNB(),
"Random Forest": RandomForestClassifier(),
"SVM": SVC()
}
def evaluate_model(model, X, y):
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=47)
scores = cross_val_score(model, X, y, scoring="accuracy", cv=cv)
return scores.mean()
results = {}
# Evaluate models, split the dataset, and print both mean accuracy and test accuracy
for name, model in models.items():
# Split the dataset for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47, stratify=y)
# Evaluate the model using cross-validation
mean_accuracy = evaluate_model(model, X_train, y_train)
# Train the model on the training set
model.fit(X_train, y_train)
# Calculate the test accuracy
test_accuracy = model.score(X_test, y_test)
results[name] = test_accuracy
# Print both mean accuracy and test accuracy
print(f"{name} cross-validation mean accuracy score:", mean_accuracy)
print(f"{name} test accuracy score:", test_accuracy)
# Select the best performing model based on mean accuracy
best_model = max(results, key=results.get)
print("Best performing model based on test accuracy:", best_model)
# Split the dataset for testing using the best model with specified random seed and stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47, stratify=y)
# Train the best model
best_model_instance = models[best_model]
best_model_instance.fit(X_train, y_train)
# Evaluate the performance of the best model on the test set
test_accuracy = best_model_instance.score(X_test, y_test)
print("Test set accuracy score (best model):", test_accuracy)
# Calculate evaluation metrics for the best model
y_pred = best_model_instance.predict(X_test)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
# Print the evaluation metrics for the best model
print("\nEvaluation metric scores for best model which is {}:".format(best_model))
print("Test set accuracy:", test_accuracy)
print("Test set precision:", precision)
print("Test set recall:", recall)
print("Test set F1 score:", f1)
joblib.dump(best_model, 'best_model_isedataset_classification.pkl')
Naive Bayes cross-validation mean accuracy score: 0.8448928307464894 Naive Bayes test accuracy score: 0.8470873786407767 Random Forest cross-validation mean accuracy score: 0.8600776053215078 Random Forest test accuracy score: 0.8592233009708737 SVM cross-validation mean accuracy score: 0.8582852919438286 SVM test accuracy score: 0.8689320388349514 Best performing model based on test accuracy: SVM Test set accuracy score (best model): 0.8689320388349514 Evaluation metric scores for best model which is SVM: Test set accuracy: 0.8689320388349514 Test set precision: 0.8710825193680792 Test set recall: 0.8689320388349514 Test set F1 score: 0.867885955027554
['best_model_isedataset_classification.pkl']
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import joblib
# Veri setini yükle
data = pd.read_csv('touchstandard.csv')
# Bağımsız değişkenler ve hedef değişkeni ayır
X = df.drop(columns=["user_id", "touch_type", "touch", "finger", "palm", "fist"])
y = df["finger"]
models = {
"Naive Bayes": GaussianNB(),
"Random Forest": RandomForestClassifier(),
"SVM": SVC()
}
def evaluate_model(model, X, y):
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=47)
scores = cross_val_score(model, X, y, scoring="accuracy", cv=cv)
return scores.mean()
results = {}
# Evaluate models, split the dataset, and print both mean accuracy and test accuracy
for name, model in models.items():
# Split the dataset for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47, stratify=y)
# Evaluate the model using cross-validation
mean_accuracy = evaluate_model(model, X_train, y_train)
# Train the model on the training set
model.fit(X_train, y_train)
# Calculate the test accuracy
test_accuracy = model.score(X_test, y_test)
results[name] = test_accuracy
# Print both mean accuracy and test accuracy
print(f"{name} cross-validation mean accuracy score:", mean_accuracy)
print(f"{name} test accuracy score:", test_accuracy)
# Select the best performing model based on mean accuracy
best_model = max(results, key=results.get)
print("Best performing model based on test accuracy:", best_model)
# Split the dataset for testing using the best model with specified random seed and stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47, stratify=y)
# Train the best model
best_model_instance = models[best_model]
best_model_instance.fit(X_train, y_train)
# Evaluate the performance of the best model on the test set
test_accuracy = best_model_instance.score(X_test, y_test)
print("Test set accuracy score (best model):", test_accuracy)
# Calculate evaluation metrics for the best model
y_pred = best_model_instance.predict(X_test)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
# Print the evaluation metrics for the best model
print("\nEvaluation metric scores for best model which is {}:".format(best_model))
print("Test set accuracy:", test_accuracy)
print("Test set precision:", precision)
print("Test set recall:", recall)
print("Test set F1 score:", f1)
joblib.dump(best_model, 'best_model_isedataset_classification.pkl')
Naive Bayes cross-validation mean accuracy score: 0.7135070214338507 Naive Bayes test accuracy score: 0.7184466019417476 Random Forest cross-validation mean accuracy score: 0.873481152993348 Random Forest test accuracy score: 0.8737864077669902 SVM cross-validation mean accuracy score: 0.873481152993348 SVM test accuracy score: 0.8737864077669902 Best performing model based on test accuracy: Random Forest Test set accuracy score (best model): 0.8737864077669902 Evaluation metric scores for best model which is Random Forest: Test set accuracy: 0.8737864077669902 Test set precision: 0.763502686398341 Test set recall: 0.8737864077669902 Test set F1 score: 0.8149303284873484
/Users/ardilsilanaydin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_classification.py:1469: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result))
['best_model_isedataset_classification.pkl']
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import joblib
# Veri setini yükle
data = pd.read_csv('touchstandard.csv')
# Bağımsız değişkenler ve hedef değişkeni ayır
X = df.drop(columns=["user_id", "touch_type", "touch", "finger", "palm", "fist"])
y = df["palm"]
models = {
"Naive Bayes": GaussianNB(),
"Random Forest": RandomForestClassifier(),
"SVM": SVC()
}
def evaluate_model(model, X, y):
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=47)
scores = cross_val_score(model, X, y, scoring="accuracy", cv=cv)
return scores.mean()
results = {}
# Evaluate models, split the dataset, and print both mean accuracy and test accuracy
for name, model in models.items():
# Split the dataset for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47, stratify=y)
# Evaluate the model using cross-validation
mean_accuracy = evaluate_model(model, X_train, y_train)
# Train the model on the training set
model.fit(X_train, y_train)
# Calculate the test accuracy
test_accuracy = model.score(X_test, y_test)
results[name] = test_accuracy
# Print both mean accuracy and test accuracy
print(f"{name} cross-validation mean accuracy score:", mean_accuracy)
print(f"{name} test accuracy score:", test_accuracy)
# Select the best performing model based on mean accuracy
best_model = max(results, key=results.get)
print("Best performing model based on test accuracy:", best_model)
# Split the dataset for testing using the best model with specified random seed and stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47, stratify=y)
# Train the best model
best_model_instance = models[best_model]
best_model_instance.fit(X_train, y_train)
# Evaluate the performance of the best model on the test set
test_accuracy = best_model_instance.score(X_test, y_test)
print("Test set accuracy score (best model):", test_accuracy)
# Calculate evaluation metrics for the best model
y_pred = best_model_instance.predict(X_test)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
# Print the evaluation metrics for the best model
print("\nEvaluation metric scores for best model which is {}:".format(best_model))
print("Test set accuracy:", test_accuracy)
print("Test set precision:", precision)
print("Test set recall:", recall)
print("Test set F1 score:", f1)
joblib.dump(best_model, 'best_model_isedataset_classification.pkl')
Naive Bayes cross-validation mean accuracy score: 0.8600923872875091 Naive Bayes test accuracy score: 0.8422330097087378 Random Forest cross-validation mean accuracy score: 0.9495232815964524 Random Forest test accuracy score: 0.9320388349514563 SVM cross-validation mean accuracy score: 0.9452512934220252 SVM test accuracy score: 0.9296116504854369 Best performing model based on test accuracy: Random Forest Test set accuracy score (best model): 0.941747572815534 Evaluation metric scores for best model which is Random Forest: Test set accuracy: 0.941747572815534 Test set precision: 0.9439423611447499 Test set recall: 0.941747572815534 Test set F1 score: 0.9370292269714351
['best_model_isedataset_classification.pkl']
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import joblib
# Veri setini yükle
data = pd.read_csv('touchstandard.csv')
# Bağımsız değişkenler ve hedef değişkeni ayır
X = df.drop(columns=["user_id", "touch_type", "touch", "finger", "palm", "fist"])
y = df["fist"]
models = {
"Naive Bayes": GaussianNB(),
"Random Forest": RandomForestClassifier(),
"SVM": SVC()
}
def evaluate_model(model, X, y):
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=47)
scores = cross_val_score(model, X, y, scoring="accuracy", cv=cv)
return scores.mean()
results = {}
# Evaluate models, split the dataset, and print both mean accuracy and test accuracy
for name, model in models.items():
# Split the dataset for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47, stratify=y)
# Evaluate the model using cross-validation
mean_accuracy = evaluate_model(model, X_train, y_train)
# Train the model on the training set
model.fit(X_train, y_train)
# Calculate the test accuracy
test_accuracy = model.score(X_test, y_test)
results[name] = test_accuracy
# Print both mean accuracy and test accuracy
print(f"{name} cross-validation mean accuracy score:", mean_accuracy)
print(f"{name} test accuracy score:", test_accuracy)
# Select the best performing model based on mean accuracy
best_model = max(results, key=results.get)
print("Best performing model based on test accuracy:", best_model)
# Split the dataset for testing using the best model with specified random seed and stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47, stratify=y)
# Train the best model
best_model_instance = models[best_model]
best_model_instance.fit(X_train, y_train)
# Evaluate the performance of the best model on the test set
test_accuracy = best_model_instance.score(X_test, y_test)
print("Test set accuracy score (best model):", test_accuracy)
# Calculate evaluation metrics for the best model
y_pred = best_model_instance.predict(X_test)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
# Print the evaluation metrics for the best model
print("\nEvaluation metric scores for best model which is {}:".format(best_model))
print("Test set accuracy:", test_accuracy)
print("Test set precision:", precision)
print("Test set recall:", recall)
print("Test set F1 score:", f1)
joblib.dump(best_model, 'best_model_isedataset_classification.pkl')
Naive Bayes cross-validation mean accuracy score: 0.7457169253510717 Naive Bayes test accuracy score: 0.7135922330097088 Random Forest cross-validation mean accuracy score: 0.8497560975609757 Random Forest test accuracy score: 0.8398058252427184 SVM cross-validation mean accuracy score: 0.8473244641537324 SVM test accuracy score: 0.8470873786407767 Best performing model based on test accuracy: SVM Test set accuracy score (best model): 0.8470873786407767 Evaluation metric scores for best model which is SVM: Test set accuracy: 0.8470873786407767 Test set precision: 0.7175570270525026 Test set recall: 0.8470873786407767 Test set F1 score: 0.776960565428728
/Users/ardilsilanaydin/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_classification.py:1469: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result))
['best_model_isedataset_classification.pkl']